library(dplyr)
library(tidyr)
library(lubridate)
library(data.table)
library(purrr)
library(plm)
library(lmtest)
library(sandwich)
library(ordinal)
library(ggplot2)

#####################
## Data Processing ##
#####################
# vdem <- read.csv("V-Dem-CY-Full+Others-v14.csv")
scad <- read.csv("SCAD2018Africa_Final.csv")
scad2 <- read.csv("SCAD2018LatinAmerica_Final.csv")
names(scad)[38] <- "lgbtq_issue"
scad <- rbind(scad, scad2)

scad <- scad %>% 
  filter(sublocal == 1)

scad$protest <- ifelse(scad$etype == 1 |
                         scad$etype == 2 |
                         scad$etype == 3 |
                         scad$etype == 4 |
                         scad$etype == 5 |
                         scad$etype == 6, 1, 0)

scad <- scad %>% 
  filter(protest == 1)

#scad <- scad %>% 
#  filter(cgovtarget == 1)

scad$npart[scad$npart == -99] <- NA

vdem.sel <- vdem %>% 
  dplyr::select(country_id, country_name, year, COWcode, v2xel_locelec, 
                v2x_polyarchy, e_gdppc, e_pop) %>% 
  dplyr::rename("ccode" = COWcode) %>% 
  dplyr::mutate("styr" = year) %>% 
  dplyr::rename("country" = country_name)

scad <- left_join(scad, vdem.sel, by = c("ccode", "styr"))

scad$startdate <- as.Date(scad$startdate, format = "%d-%b-%y")
scad$enddate <- as.Date(scad$enddate, format = "%d-%b-%y")

scad <- scad %>%
  mutate(event_date = startdate + as.numeric(difftime(enddate, startdate, units = "days")) / 2)

scad$startdate <- scad$event_date

scad <- scad %>%
  arrange(elocal, startdate)

setDT(scad)
setorder(scad, elocal, startdate)

scad <- scad %>%
  arrange(elocal, startdate) %>%
  group_by(elocal) %>%
  mutate(cumulated_fatal_weighted = map_dbl(row_number(), function(i) {
    if (i > 1) {
      days_diff <- as.numeric(difftime(startdate[i], startdate[1:(i - 1)], units = "days"))
      weights <- 1 - (days_diff / max(days_diff))  # linear weights
      weighted.mean(ndeath[1:(i - 1)], w = weights, na.rm = TRUE)
    } else {
      NA_real_
    }
  })) %>%
  ungroup()

# Define decay parameter
half_life <- 730
lambda <- log(2) / half_life

# Create exponential decay weighted average
scad <- scad %>%
  arrange(elocal, startdate) %>%
  group_by(elocal) %>%
  mutate(cumulated_fatal_weighted_exp = map_dbl(row_number(), function(i) {
    if (i > 1) {
      days_diff <- as.numeric(difftime(event_date[i], event_date[1:(i - 1)], units = "days"))
      weights_exp <- exp(-lambda * days_diff)
      weighted.mean(ndeath[1:(i - 1)], w = weights_exp, na.rm = TRUE)
    } else {
      NA_real_
    }
  })) %>%
  ungroup()

scad <- as.data.frame(scad)  
scad$ndeath[scad$ndeath < 0] <- NA

scad_meaned <- scad %>%
  group_by(elocal, startdate) %>%
  dplyr::summarize(
    participants_max = if (all(is.na(npart))) NA_real_ else max(npart, na.rm = TRUE),
    ndeath = mean(ndeath, na.rm = TRUE),
    v2x_polyarchy = mean(v2x_polyarchy, na.rm = TRUE),
    gdppc = mean(e_gdppc, na.rm = TRUE),
    pop = mean(e_pop, na.rm = TRUE),
    cumulated_fatal_weighted = mean(cumulated_fatal_weighted, na.rm = TRUE),
    cumulated_fatal_weighted_exp = mean(cumulated_fatal_weighted_exp, na.rm = TRUE),  
    v2xel_locelec = mean(v2xel_locelec, na.rm = TRUE),
    .groups = "drop"
  )

scad_final <- scad_meaned %>%
  arrange(elocal, startdate) %>%
  group_by(elocal) %>%
  mutate(
    fatality_dummy_weighted = ifelse(ndeath > cumulated_fatal_weighted, 1, 0),
    fatality_diff_weighted = ndeath - cumulated_fatal_weighted,
    fatality_diff_weighted_exp = ndeath - dplyr::lag(cumulated_fatal_weighted_exp),  
    participants_lead = dplyr::lead(participants_max)
  ) %>%
  ungroup() %>%
  mutate(
    fatality_diff_signed_weighted = sign(fatality_diff_weighted) * log1p(abs(fatality_diff_weighted)),
    fatality_diff_signed_weighted_exp = sign(fatality_diff_weighted_exp) * log1p(abs(fatality_diff_weighted_exp))  
  )

scad_final <- scad_final %>% 
  mutate(ndeath_log = log1p(ndeath)) %>%
  mutate(gdppc = log1p(gdppc)) %>% 
  mutate(pop = log1p(pop))

pdata <- pdata.frame(scad_final, index = c("elocal", "startdate"))

write.csv(pdata, "scad_local_final.csv", row.names = FALSE)
